library(data.table)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(reshape2)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
data = read.csv("data- originalchannel4 bigdata.csv", header = T, fill = T) #758916 obs
cat("the total number of IDs in this file is", nrow(data))
## the total number of IDs in this file is 758916
#remove_repeats
data2 = subset(data, repeat. == 0) #695166
cat("the number of non-repeated participants is", nrow(data2))
## the number of non-repeated participants is 695166
#keep males of females
data2 = subset(data2, sex == 1 | sex == 2)#672279
cat("\n")
cat("Of this, the number of participants who are either male or female is", nrow(data2))
## Of this, the number of participants who are either male or female is 672279
##keep age bound
data2 = subset(data2, age > 15 & age < 90)
cat("\n")
cat("Of this, the number participants after all QC is ", nrow(data2))
## Of this, the number participants after all QC is 671606
#recode AQ
data2[,c(51,57,58,60)] <- lapply(data2[,c(51,57,58,60)], function(x)
recode(x,"1" = 1, "2" = 1, "3" = 0, "4" =0 ))
data2[,c(52,53,54,55,56,59)] <- lapply(data2[,c(52,53,54,55,56,59)], function(x)
recode(x,"1" = 0, "2" = 0, "3" = 1, "4" =1))
data2$AQ_full = data2$AQ_1 + data2$AQ_2 + data2$AQ_3 + data2$AQ_4 + data2$AQ_5 + data2$AQ_6 + data2$AQ_7 + data2$AQ_8 + data2$AQ_9 + data2$AQ_10
data2$AQ_Z = scale(data2$AQ_full, center = TRUE, scale = TRUE)
#Recode EQ
data2[,c(31, 32, 34, 36, 39)] <- lapply(data2[,c(31, 32, 34, 36, 39)], function(x)
recode(x,"1" = 2, "2" = 1, "3" = 0, "4" =0 ))
data2[,c(33, 35, 37, 38, 40 )] <- lapply(data2[,c(33, 35, 37, 38, 40)], function(x)
recode(x,"1" = 0, "2" = 0, "3" = 1, "4" =2))
data2$EQ_full = data2$EQ_1 + data2$EQ_2 + data2$EQ_3 + data2$EQ_4 + data2$EQ_5 + data2$EQ_6 + data2$EQ_7 + data2$EQ_8 + data2$EQ_9 + data2$EQ_10
data2$EQ_Z = scale(data2$EQ_full, center = TRUE, scale = TRUE)
#Recode SQ
data2[,c(41,43, 44, 46, 47, 49, 50)] <- lapply(data2[,c(41,43, 44, 46, 47, 49, 50)], function(x)
recode(x,"1" = 2, "2" = 1, "3" = 0, "4" =0 ))
data2[,c(42, 45, 48 )] <- lapply(data2[,c(42, 45, 48)], function(x)
recode(x,"1" = 0, "2" = 0, "3" = 1, "4" =2))
data2$SQ_full = data2$SQR_1 + data2$SQR_2 + data2$SQR_3 + data2$SQR_4 + data2$SQR_5 + data2$SQR_6 + data2$SQR_7 + data2$SQR_8 + data2$SQR_9 + data2$SQR_10
data2$SQ_Z = scale(data2$SQ_full, center = TRUE, scale = TRUE)
#Recode SPQ
data2[,c(21:30)] <- lapply(data2[,c(21:30)], function(x)
recode(x,"1" = 3, "2" = 2, "3" = 1, "4" =0 ))
data2$SPQ_full = data2$SPQ_1 + data2$SPQ_2 + data2$SPQ_3 + data2$SPQ_4 + data2$SPQ_5 + data2$SPQ_6 + data2$SPQ_7 + data2$SPQ_8 + data2$SPQ_9 + data2$SPQ_10
data2$SPQ_Z = scale(data2$SPQ_full, center = TRUE, scale = TRUE)
data2 = data2[!is.na(data2$AQ_full),]
#Define cases
#define cases based on all different options
data2$autism = ifelse(data2$diagnosis_0 == "2" | data2$diagnosis_1 == "2"| data2$diagnosis_3 == "2" | data2$diagnosis_4 == "2" | data2$diagnosis_5 == "2" | data2$diagnosis_6 == "2" | data2$diagnosis_7 == "2" | data2$diagnosis_8 == "2" | data2$autism_diagnosis_0 > 0 | data2$autism_diagnosis_1 > 0 | data2$autism_diagnosis_2 > 0, 1, 0)
data2$autism[is.na(data2$autism)] <- 0
#define cases based only on the autism criteria
data2$autism2 = ifelse(data2$autism_diagnosis_0 > 0 | data2$autism_diagnosis_1 > 0 | data2$autism_diagnosis_2 > 0, 1, 0)
data2$autism2[is.na(data2$autism2)] <- 0
#count the number of cases
a = nrow(subset(data2, autism == 1))
cat("the number of autism cases is (broad criteria)", a)
## the number of autism cases is (broad criteria) 36648
cat("\n")
b = nrow(subset(data2, autism2 == 1))
cat("the number of autism cases is (narrow criteria)", b)
## the number of autism cases is (narrow criteria) 36426
#define schizophrenia
data2$schizophrenia = ifelse(data2$diagnosis_0 == "7" | data2$diagnosis_1 == "7"| data2$diagnosis_3 == "7" | data2$diagnosis_4 == "7" | data2$diagnosis_5 == "7" | data2$diagnosis_6 == "7" | data2$diagnosis_7 == "7" | data2$diagnosis_8 == "7" | data2$autism_diagnosis_0 == "0" | data2$autism_diagnosis_1 == "0" | data2$autism_diagnosis_2 == "0", 1, 0)
data2$schizophrenia[is.na(data2$schizophrenia)] <- 0
#define ocd
data2$ocd = ifelse(data2$diagnosis_0 == "6" | data2$diagnosis_1 == "6"| data2$diagnosis_3 == "6" | data2$diagnosis_4 == "6" | data2$diagnosis_5 == "6" | data2$diagnosis_6 == "6" | data2$diagnosis_7 == "6" | data2$diagnosis_8 == "6" | data2$autism_diagnosis_0 == "0" | data2$autism_diagnosis_1 == "0" | data2$autism_diagnosis_2 == "0", 1, 0)
data2$ocd[is.na(data2$ocd)] <- 0
#define ld
data2$ld = ifelse(data2$diagnosis_0 == "5" | data2$diagnosis_1 == "5"| data2$diagnosis_3 == "5" | data2$diagnosis_4 == "5" | data2$diagnosis_5 == "5" | data2$diagnosis_6 == "5" | data2$diagnosis_7 == "5" | data2$diagnosis_8 == "5" | data2$autism_diagnosis_0 == "0" | data2$autism_diagnosis_1 == "0" | data2$autism_diagnosis_2 == "0", 1, 0)
data2$ld[is.na(data2$ld)] <- 0
#define depression
data2$depression = ifelse(data2$diagnosis_0 == "4" | data2$diagnosis_1 == "4"| data2$diagnosis_3 == "4" | data2$diagnosis_4 == "4" | data2$diagnosis_5 == "4" | data2$diagnosis_6 == "4" | data2$diagnosis_7 == "4" | data2$diagnosis_8 == "4" | data2$autism_diagnosis_0 == "0" | data2$autism_diagnosis_1 == "0" | data2$autism_diagnosis_2 == "0", 1, 0)
data2$depression[is.na(data2$depression)] <- 0
#define bipolar
data2$bipolar = ifelse(data2$diagnosis_0 == "3" | data2$diagnosis_1 == "3"| data2$diagnosis_3 == "3" | data2$diagnosis_4 == "3" | data2$diagnosis_5 == "3" | data2$diagnosis_6 == "3" | data2$diagnosis_7 == "3" | data2$diagnosis_8 == "3" | data2$autism_diagnosis_0 == "0" | data2$autism_diagnosis_1 == "0" | data2$autism_diagnosis_2 == "0", 1, 0)
data2$bipolar[is.na(data2$bipolar)] <- 0
#define adhd
data2$adhd = ifelse(data2$diagnosis_0 == "1" | data2$diagnosis_1 == "1"| data2$diagnosis_3 == "1" | data2$diagnosis_4 == "1" | data2$diagnosis_5 == "1" | data2$diagnosis_6 == "1" | data2$diagnosis_7 == "1" | data2$diagnosis_8 == "6" | data2$autism_diagnosis_0 == "0" | data2$autism_diagnosis_1 == "0" | data2$autism_diagnosis_2 == "0", 1, 0)
data2$ocd[is.na(data2$ocd)] <- 0
#Define other variables
data2$STEM = ifelse(data2$occupation == "3" | data2$occupation == "5" | data2$occupation == "21" , 1, 0)
data2$STEM[is.na(data2$STEM)] <- 0
data2$STEM = ifelse(data2$occupation == "26", 0, data2$STEM)
STEM = subset(data2, STEM == "1")
cat("No of participants in STEM occupation is", nrow(STEM))
## No of participants in STEM occupation is 76267
cat("\n")
###update country region
data2$countryregion = ifelse(data2$countryregion == "14", 0, data2$countryregion)
###update education
data2$education = ifelse(data2$education == "5", 0, data2$education)
###update handedness
data2$handedness = ifelse(data2$handedness == "4", 0, data2$handedness)
#This is Wheelwright's method: https://www.ncbi.nlm.nih.gov/pubmed/16473340
controls = subset(data2, autism == "0")
cases = subset(data2, autism == "1")
meanSQ = mean(controls$SQ_full)
meanEQ = mean(controls$EQ_full)
data2$SQ_full_standardized_w = (data2$SQ_full - meanSQ)/20
data2$EQ_full_standardized_w = (data2$EQ_full - meanEQ)/20
data2$wheelwrightD = data2$SQ_full_standardized_w - data2$EQ_full_standardized_w
data2$dpercentile = ntile(data2$wheelwrightD, 100)
data2$braintype = ifelse(data2$dpercentile < 2.5, "1",
ifelse(between(data2$dpercentile, 2.499, 35), "2",
ifelse(between(data2$dpercentile, 34.99, 65), "3",
ifelse(between(data2$dpercentile, 64.99 , 97.5), "4", "5"))))
controls = subset(data2, autism == "0") #redefining cases and controls again so downstream analysis can include braintype
cases = subset(data2, autism == "1")
males_controls = subset(controls, sex == "1")
females_controls = subset(controls, sex == "2")
dim(males_controls)
## [1] 241355 82
dim(females_controls)
## [1] 393600 82
males_cases = subset(cases, sex == "1")
females_cases = subset(cases, sex == "2")
dim(males_cases)
## [1] 18188 82
dim(females_cases)
## [1] 18460 82
#Basic statistics
# Sex differences
t.test(controls$AQ_full ~ controls$sex)
##
## Welch Two Sample t-test
##
## data: controls$AQ_full by controls$sex
## t = 68.682, df = 501320, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.3897157 0.4126117
## sample estimates:
## mean in group 1 mean in group 2
## 3.570429 3.169266
ggplot(controls, aes(x=AQ_full, colour=as.character(sex))) + geom_density() + theme_minimal()
t.test(controls$EQ_full ~ controls$sex)
##
## Welch Two Sample t-test
##
## data: controls$EQ_full by controls$sex
## t = -155.09, df = 518030, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.945568 -1.897007
## sample estimates:
## mean in group 1 mean in group 2
## 8.875432 10.796720
ggplot(controls, aes(x=EQ_full, colour=as.character(sex))) + geom_density() + theme_minimal()
t.test(controls$SQ_full ~ controls$sex)
##
## Welch Two Sample t-test
##
## data: controls$SQ_full by controls$sex
## t = 122.11, df = 481110, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.263506 1.304729
## sample estimates:
## mean in group 1 mean in group 2
## 6.734478 5.450361
ggplot(controls, aes(x=SQ_full, colour=as.character(sex))) + geom_density() + theme_minimal()
t.test(controls$SPQ_full ~ controls$sex)
##
## Welch Two Sample t-test
##
## data: controls$SPQ_full by controls$sex
## t = -57.099, df = 526580, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.8558141 -0.7990105
## sample estimates:
## mean in group 1 mean in group 2
## 13.99455 14.82196
ggplot(controls, aes(x=SPQ_full, colour=as.character(sex))) + geom_density() + theme_minimal()
controls2 = controls[,c ("sex", "AQ_full", "EQ_full", "SQ_full", "SPQ_full" )]
controls2 = na.omit(controls2)
controls2_melt = melt(controls2, id.vars=c("sex"))
ddply(controls2_melt, c("sex", "variable"), summarise,
mean = mean(value), sd = sd(value),
sem = sd(value)/sqrt(length(value)))
## sex variable mean sd sem
## 1 1 AQ_full 3.570429 2.278934 0.004638778
## 2 1 EQ_full 8.875432 4.756196 0.009681254
## 3 1 SQ_full 6.734478 4.180116 0.008508640
## 4 1 SPQ_full 13.994552 5.515901 0.011227636
## 5 2 AQ_full 3.169266 2.226789 0.003549372
## 6 2 EQ_full 10.796720 4.849119 0.007729213
## 7 2 SQ_full 5.450361 3.877523 0.006180545
## 8 2 SPQ_full 14.821964 5.747510 0.009161196
t.test(cases$AQ_full ~ cases$sex)
##
## Welch Two Sample t-test
##
## data: cases$AQ_full by cases$sex
## t = 7.4906, df = 36638, p-value = 7.008e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1561934 0.2669022
## sample estimates:
## mean in group 1 mean in group 2
## 4.875632 4.664085
ggplot(cases, aes(x=AQ_full, colour=as.character(sex))) + geom_density() + theme_minimal()
t.test(cases$EQ_full ~ cases$sex)
##
## Welch Two Sample t-test
##
## data: cases$EQ_full by cases$sex
## t = -26.415, df = 36550, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.444680 -1.245096
## sample estimates:
## mean in group 1 mean in group 2
## 6.920497 8.265385
ggplot(cases, aes(x=EQ_full, colour=as.character(sex))) + geom_density() + theme_minimal()
t.test(cases$SQ_full ~ cases$sex)
##
## Welch Two Sample t-test
##
## data: cases$SQ_full by cases$sex
## t = 20.866, df = 36441, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.8907598 1.0754565
## sample estimates:
## mean in group 1 mean in group 2
## 8.077854 7.094745
ggplot(cases, aes(x=SQ_full, colour=as.character(sex))) + geom_density() + theme_minimal()
t.test(cases$SPQ_full ~ cases$sex)
##
## Welch Two Sample t-test
##
## data: cases$SPQ_full by cases$sex
## t = -11.968, df = 36607, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.9046750 -0.6500477
## sample estimates:
## mean in group 1 mean in group 2
## 16.33055 17.10791
ggplot(cases, aes(x=SPQ_full, colour=as.character(sex))) + geom_density() + theme_minimal()
cases2 = cases[,c ("sex", "AQ_full", "EQ_full", "SQ_full", "SPQ_full" )]
cases2 = na.omit(cases2)
cases2_melt = melt(cases2, id.vars=c("sex"))
ddply(cases2_melt, c("sex", "variable"), summarise,
mean = mean(value), sd = sd(value),
sem = sd(value)/sqrt(length(value)))
## sex variable mean sd sem
## 1 1 AQ_full 4.875632 2.662900 0.01974524
## 2 1 EQ_full 6.920497 4.710717 0.03492967
## 3 1 SQ_full 8.077854 4.642268 0.03442213
## 4 1 SPQ_full 16.330548 6.272512 0.04651029
## 5 2 AQ_full 4.664085 2.743430 0.02019194
## 6 2 EQ_full 8.265385 5.032808 0.03704201
## 7 2 SQ_full 7.094745 4.371088 0.03217168
## 8 2 SPQ_full 17.107909 6.160577 0.04534251
summary(lm(AQ_full ~ as.character(sex) + as.character(autism) + as.character(sex):as.character(autism), data = data2))
##
## Call:
## lm(formula = AQ_full ~ as.character(sex) + as.character(autism) +
## as.character(sex):as.character(autism), data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8756 -1.5704 -0.1693 1.4296 6.8307
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.570429 0.004629 771.342
## as.character(sex)2 -0.401164 0.005879 -68.234
## as.character(autism)1 1.305203 0.017486 74.644
## as.character(sex)2:as.character(autism)1 0.189616 0.024475 7.747
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## as.character(sex)2 < 2e-16 ***
## as.character(autism)1 < 2e-16 ***
## as.character(sex)2:as.character(autism)1 9.4e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.274 on 671599 degrees of freedom
## Multiple R-squared: 0.02719, Adjusted R-squared: 0.02718
## F-statistic: 6257 on 3 and 671599 DF, p-value: < 2.2e-16
summary(lm(EQ_full ~ as.character(sex) + as.character(autism) + as.character(sex):as.character(autism), data = data2))
##
## Call:
## lm(formula = EQ_full ~ as.character(sex) + as.character(autism) +
## as.character(sex):as.character(autism), data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7967 -3.7967 0.2033 3.2033 13.0795
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 8.875432 0.009806 905.12
## as.character(sex)2 1.921288 0.012455 154.26
## as.character(autism)1 -1.954935 0.037042 -52.78
## as.character(sex)2:as.character(autism)1 -0.576400 0.051848 -11.12
## Pr(>|t|)
## (Intercept) <2e-16 ***
## as.character(sex)2 <2e-16 ***
## as.character(autism)1 <2e-16 ***
## as.character(sex)2:as.character(autism)1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.817 on 671599 degrees of freedom
## Multiple R-squared: 0.04766, Adjusted R-squared: 0.04765
## F-statistic: 1.12e+04 on 3 and 671599 DF, p-value: < 2.2e-16
summary(lm(SQ_full ~ as.character(sex) + as.character(autism) + as.character(sex):as.character(autism), data = data2))
##
## Call:
## lm(formula = SQ_full ~ as.character(sex) + as.character(autism) +
## as.character(sex):as.character(autism), data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0779 -3.0947 -0.4504 2.5496 14.5496
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 6.734478 0.008193 822.012
## as.character(sex)2 -1.284117 0.010406 -123.406
## as.character(autism)1 1.343375 0.030948 43.407
## as.character(sex)2:as.character(autism)1 0.301009 0.043319 6.949
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## as.character(sex)2 < 2e-16 ***
## as.character(autism)1 < 2e-16 ***
## as.character(sex)2:as.character(autism)1 3.69e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.025 on 671599 degrees of freedom
## Multiple R-squared: 0.0311, Adjusted R-squared: 0.0311
## F-statistic: 7186 on 3 and 671599 DF, p-value: < 2.2e-16
summary(lm(SPQ_full ~ as.character(sex) + as.character(autism) + as.character(sex):as.character(autism), data = data2))
##
## Call:
## lm(formula = SPQ_full ~ as.character(sex) + as.character(autism) +
## as.character(sex):as.character(autism), data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.1079 -3.9946 0.0054 4.0054 16.0054
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 13.99455 0.01159 1207.809
## as.character(sex)2 0.82741 0.01472 56.223
## as.character(autism)1 2.33600 0.04377 53.370
## as.character(sex)2:as.character(autism)1 -0.05005 0.06126 -0.817
## Pr(>|t|)
## (Intercept) <2e-16 ***
## as.character(sex)2 <2e-16 ***
## as.character(autism)1 <2e-16 ***
## as.character(sex)2:as.character(autism)1 0.414
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.692 on 671599 degrees of freedom
## Multiple R-squared: 0.01261, Adjusted R-squared: 0.0126
## F-statistic: 2859 on 3 and 671599 DF, p-value: < 2.2e-16
cor.test(controls$AQ_full, controls$EQ_full)
##
## Pearson's product-moment correlation
##
## data: controls$AQ_full and controls$EQ_full
## t = -586.6, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5944322 -0.5912418
## sample estimates:
## cor
## -0.5928393
cor.test(controls$AQ_full, controls$SQ_full)
##
## Pearson's product-moment correlation
##
## data: controls$AQ_full and controls$SQ_full
## t = 363.1, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4126139 0.4166874
## sample estimates:
## cor
## 0.4146527
cor.test(controls$AQ_full, controls$SPQ_full)
##
## Pearson's product-moment correlation
##
## data: controls$AQ_full and controls$SPQ_full
## t = 288.44, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3381928 0.3425422
## sample estimates:
## cor
## 0.3403694
cor.test(controls$EQ_full, controls$SQ_full)
##
## Pearson's product-moment correlation
##
## data: controls$EQ_full and controls$SQ_full
## t = -176.6, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2187171 -0.2140280
## sample estimates:
## cor
## -0.2163738
cor.test(controls$EQ_full, controls$SPQ_full)
##
## Pearson's product-moment correlation
##
## data: controls$EQ_full and controls$SPQ_full
## t = -126.12, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1587309 -0.1539317
## sample estimates:
## cor
## -0.1563322
cor.test(controls$SQ_full, controls$SPQ_full)
##
## Pearson's product-moment correlation
##
## data: controls$SQ_full and controls$SPQ_full
## t = 425.13, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4687968 0.4726262
## sample estimates:
## cor
## 0.4707137
#Age differences
cor.test(controls$AQ_full, controls$age)
##
## Pearson's product-moment correlation
##
## data: controls$AQ_full and controls$age
## t = -32.633, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0433738 -0.0384627
## sample estimates:
## cor
## -0.0409185
ggplot(controls, aes(AQ_full, age)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(controls$EQ_full, controls$age)
##
## Pearson's product-moment correlation
##
## data: controls$EQ_full and controls$age
## t = 44.151, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.05287075 0.05777503
## sample estimates:
## cor
## 0.05532322
ggplot(controls, aes(EQ_full, age)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(controls$SQ_full, controls$age)
##
## Pearson's product-moment correlation
##
## data: controls$SQ_full and controls$age
## t = 30.478, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03576464 0.04067679
## sample estimates:
## cor
## 0.03822095
ggplot(controls, aes(SQ_full, age)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(controls$SPQ_full, controls$age)
##
## Pearson's product-moment correlation
##
## data: controls$SPQ_full and controls$age
## t = 80.472, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.09804299 0.10291266
## sample estimates:
## cor
## 0.1004784
ggplot(controls, aes(SPQ_full, age)) + geom_smooth(method = "lm") + theme_minimal()
#Age differences
cor.test(cases$AQ_full, cases$age)
##
## Pearson's product-moment correlation
##
## data: cases$AQ_full and cases$age
## t = 22.515, df = 36646, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1066959 0.1268930
## sample estimates:
## cor
## 0.1168065
ggplot(cases, aes(AQ_full, age)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(cases$EQ_full, cases$age)
##
## Pearson's product-moment correlation
##
## data: cases$EQ_full and cases$age
## t = -9.8091, df = 36646, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06137990 -0.04095703
## sample estimates:
## cor
## -0.05117382
ggplot(cases, aes(EQ_full, age)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(cases$SQ_full, cases$age)
##
## Pearson's product-moment correlation
##
## data: cases$SQ_full and cases$age
## t = 26.126, df = 36646, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1251606 0.1452627
## sample estimates:
## cor
## 0.1352256
ggplot(cases, aes(SQ_full, age)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(cases$SPQ_full, cases$age)
##
## Pearson's product-moment correlation
##
## data: cases$SPQ_full and cases$age
## t = 28.092, df = 36646, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1351556 0.1552004
## sample estimates:
## cor
## 0.1451929
ggplot(cases, aes(SPQ_full, age)) + geom_smooth(method = "lm") + theme_minimal()
#Correlation with education
cor.test(controls$AQ_full, controls$education)
##
## Pearson's product-moment correlation
##
## data: controls$AQ_full and controls$education
## t = -72.145, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.09260927 -0.08772992
## sample estimates:
## cor
## -0.09017014
ggplot(controls, aes(education, AQ_full)) + geom_smooth(method = "lm") + theme_minimal()
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
cor.test(controls$EQ_full, controls$education)
##
## Pearson's product-moment correlation
##
## data: controls$EQ_full and controls$education
## t = 79.747, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.09714665 0.10201721
## sample estimates:
## cor
## 0.09958253
ggplot(controls, aes(education, EQ_full)) + geom_smooth(method = "lm") + theme_minimal()
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
cor.test(controls$SQ_full, controls$education)
##
## Pearson's product-moment correlation
##
## data: controls$SQ_full and controls$education
## t = 10.739, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.01101702 0.01593548
## sample estimates:
## cor
## 0.01347633
ggplot(controls, aes(education, SQ_full)) + geom_smooth(method = "lm") + theme_minimal()
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
cor.test(controls$SPQ_full, controls$education)
##
## Pearson's product-moment correlation
##
## data: controls$SPQ_full and controls$education
## t = -60.811, df = 634950, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07853897 -0.07364810
## sample estimates:
## cor
## -0.07609399
ggplot(controls, aes(education, SPQ_full)) + geom_smooth(method = "lm") + theme_minimal()
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
#Correlation with education
cor.test(cases$AQ_full, cases$education)
##
## Pearson's product-moment correlation
##
## data: cases$AQ_full and cases$education
## t = -9.6043, df = 36646, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06031546 -0.03989038
## sample estimates:
## cor
## -0.05010816
ggplot(cases, aes(education, AQ_full)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(cases$EQ_full, cases$education)
##
## Pearson's product-moment correlation
##
## data: cases$EQ_full and cases$education
## t = 16.144, df = 36646, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.07385792 0.09418983
## sample estimates:
## cor
## 0.08403262
ggplot(cases, aes(education, EQ_full)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(cases$SQ_full, cases$education)
##
## Pearson's product-moment correlation
##
## data: cases$SQ_full and cases$education
## t = 4.2136, df = 36646, p-value = 2.519e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.01177013 0.03223670
## sample estimates:
## cor
## 0.02200572
ggplot(cases, aes(education, SQ_full)) + geom_smooth(method = "lm") + theme_minimal()
cor.test(cases$SPQ_full, cases$education)
##
## Pearson's product-moment correlation
##
## data: cases$SPQ_full and cases$education
## t = -11.977, df = 36646, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07263359 -0.05223692
## sample estimates:
## cor
## -0.06244177
ggplot(cases, aes(education, SPQ_full)) + geom_smooth(method = "lm") + theme_minimal()
m = aov(data=controls, AQ_full~as.character(countryregion))
anova(m)
## Analysis of Variance Table
##
## Response: AQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(countryregion) 13 3485 268.06 52.762 < 2.2e-16 ***
## Residuals 634938 3225759 5.08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m = aov(data=controls, EQ_full~as.character(countryregion))
anova(m)
## Analysis of Variance Table
##
## Response: EQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(countryregion) 13 19225 1478.85 61.581 < 2.2e-16 ***
## Residuals 634938 15247838 24.01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m = aov(data=controls, SQ_full~as.character(countryregion))
anova(m)
## Analysis of Variance Table
##
## Response: SQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(countryregion) 13 90279 6944.5 428.45 < 2.2e-16 ***
## Residuals 634938 10291501 16.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m = aov(data=controls, SPQ_full~as.character(countryregion))
anova(m)
## Analysis of Variance Table
##
## Response: SPQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(countryregion) 13 28654 2204.12 68.538 < 2.2e-16 ***
## Residuals 634938 20419033 32.16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
controls2 = controls[,c ("sex", "countryregion", "AQ_full", "EQ_full", "SQ_full", "SPQ_full" )]
controls2_melt = melt(controls2, id.vars=c("sex", "countryregion"))
head(controls2_melt)
## sex countryregion variable value
## 1 1 1 AQ_full 6
## 2 1 2 AQ_full 3
## 3 2 4 AQ_full 2
## 4 1 9 AQ_full 5
## 5 2 4 AQ_full 1
## 6 2 4 AQ_full 0
alpha = ddply(controls2_melt, c("sex", "countryregion", "variable"), summarise,
mean = mean(value), sd = sd(value),
sem = sd(value)/sqrt(length(value)))
alpha
## sex countryregion variable mean sd sem
## 1 1 0 AQ_full 3.537683 2.208153 0.020446734
## 2 1 0 EQ_full 8.940067 4.655561 0.043108896
## 3 1 0 SQ_full 7.012004 4.237871 0.039241232
## 4 1 0 SPQ_full 13.874904 5.699700 0.052777262
## 5 1 1 AQ_full 3.680491 2.305856 0.025541909
## 6 1 1 EQ_full 8.698896 4.654897 0.051562183
## 7 1 1 SQ_full 6.808712 4.195884 0.046477709
## 8 1 1 SPQ_full 14.383926 5.603363 0.062068318
## 9 1 2 AQ_full 3.514864 2.288303 0.017859935
## 10 1 2 EQ_full 8.884077 4.708515 0.036749402
## 11 1 2 SQ_full 6.534539 4.089856 0.031920842
## 12 1 2 SPQ_full 13.979593 5.421161 0.042311518
## 13 1 3 AQ_full 3.423891 2.170407 0.026308443
## 14 1 3 EQ_full 9.204672 4.650771 0.056374014
## 15 1 3 SQ_full 6.345137 4.009179 0.048597001
## 16 1 3 SPQ_full 14.071701 5.553679 0.067318559
## 17 1 4 AQ_full 3.392804 2.210797 0.014564272
## 18 1 4 EQ_full 9.366288 4.791003 0.031562137
## 19 1 4 SQ_full 6.472615 4.069815 0.026811099
## 20 1 4 SPQ_full 13.724546 5.402761 0.035592266
## 21 1 5 AQ_full 3.666031 2.307108 0.023115040
## 22 1 5 EQ_full 8.626079 4.732607 0.047416242
## 23 1 5 SQ_full 6.559526 4.118772 0.041266198
## 24 1 5 SPQ_full 14.408653 5.580591 0.055912244
## 25 1 6 AQ_full 3.655633 2.301357 0.017208363
## 26 1 6 EQ_full 8.706682 4.762788 0.035613671
## 27 1 6 SQ_full 6.633156 4.189445 0.031326511
## 28 1 6 SPQ_full 14.344031 5.543560 0.041451882
## 29 1 7 AQ_full 3.645297 2.298762 0.020276442
## 30 1 7 EQ_full 8.540885 4.734631 0.041762245
## 31 1 7 SQ_full 6.562281 4.126382 0.036397135
## 32 1 7 SPQ_full 14.144402 5.431490 0.047908963
## 33 1 8 AQ_full 3.757121 2.318135 0.022113608
## 34 1 8 EQ_full 8.634635 4.773139 0.045532870
## 35 1 8 SQ_full 6.702339 4.135881 0.039453809
## 36 1 8 SPQ_full 14.356993 5.457821 0.052064325
## 37 1 9 AQ_full 3.781260 2.357438 0.021552723
## 38 1 9 EQ_full 8.419592 4.775541 0.043660062
## 39 1 9 SQ_full 6.681377 4.109097 0.037567146
## 40 1 9 SPQ_full 14.335172 5.453089 0.049854505
## 41 1 10 AQ_full 3.646410 2.326516 0.013529229
## 42 1 10 EQ_full 8.748233 4.811933 0.027982506
## 43 1 10 SQ_full 6.580332 4.158461 0.024182410
## 44 1 10 SPQ_full 14.119847 5.457430 0.031736216
## 45 1 11 AQ_full 3.661345 2.315339 0.017390383
## 46 1 11 EQ_full 8.786698 4.781361 0.035912541
## 47 1 11 SQ_full 6.708338 4.167996 0.031305593
## 48 1 11 SPQ_full 14.214939 5.508773 0.041376094
## 49 1 12 AQ_full 3.463636 2.236946 0.009028093
## 50 1 12 EQ_full 9.045119 4.740131 0.019130702
## 51 1 12 SQ_full 7.057173 4.275507 0.017255526
## 52 1 12 SPQ_full 13.594905 5.543951 0.022374841
## 53 1 13 AQ_full 3.630198 2.330498 0.043024773
## 54 1 13 EQ_full 8.811861 4.757686 0.087834621
## 55 1 13 SQ_full 6.767212 4.264683 0.078732987
## 56 1 13 SPQ_full 14.244717 5.680921 0.104879030
## 57 1 NA AQ_full 1.000000 NA NA
## 58 1 NA EQ_full 6.000000 NA NA
## 59 1 NA SQ_full 2.000000 NA NA
## 60 1 NA SPQ_full 8.000000 NA NA
## 61 2 0 AQ_full 3.354133 2.194253 0.018280996
## 62 2 0 EQ_full 10.497744 4.670793 0.038913815
## 63 2 0 SQ_full 6.152079 4.007419 0.033387043
## 64 2 0 SPQ_full 15.302700 5.876033 0.048955042
## 65 2 1 AQ_full 3.257416 2.234546 0.017905612
## 66 2 1 EQ_full 10.636317 4.780967 0.038310309
## 67 2 1 SQ_full 5.470720 3.861702 0.030944159
## 68 2 1 SPQ_full 15.017337 5.757003 0.046131375
## 69 2 2 AQ_full 3.099208 2.224139 0.012747717
## 70 2 2 EQ_full 10.895634 4.793870 0.027476208
## 71 2 2 SQ_full 5.238921 3.774540 0.021633888
## 72 2 2 SPQ_full 14.558654 5.720728 0.032788520
## 73 2 3 AQ_full 3.232320 2.171664 0.020651626
## 74 2 3 EQ_full 10.645325 4.723033 0.044914100
## 75 2 3 SQ_full 5.310816 3.790106 0.036042346
## 76 2 3 SPQ_full 14.963013 5.836393 0.055501689
## 77 2 4 AQ_full 2.967371 2.155629 0.012120991
## 78 2 4 EQ_full 11.279784 4.846684 0.027252657
## 79 2 4 SQ_full 5.180505 3.716554 0.020897991
## 80 2 4 SPQ_full 14.335715 5.665844 0.031858753
## 81 2 5 AQ_full 3.229662 2.201800 0.016474985
## 82 2 5 EQ_full 10.626617 4.800155 0.035917189
## 83 2 5 SQ_full 5.234645 3.781857 0.028297769
## 84 2 5 SPQ_full 14.983875 5.707441 0.042705964
## 85 2 6 AQ_full 3.197895 2.220610 0.012427967
## 86 2 6 EQ_full 10.731128 4.852364 0.027156957
## 87 2 6 SQ_full 5.212679 3.782165 0.021167435
## 88 2 6 SPQ_full 14.926831 5.695904 0.031877952
## 89 2 7 AQ_full 3.226686 2.235648 0.014662589
## 90 2 7 EQ_full 10.560392 4.851843 0.031821008
## 91 2 7 SQ_full 5.201222 3.735724 0.024500899
## 92 2 7 SPQ_full 14.767636 5.629790 0.036923203
## 93 2 8 AQ_full 3.185358 2.254743 0.015971816
## 94 2 8 EQ_full 10.712078 4.871450 0.034507660
## 95 2 8 SQ_full 5.255357 3.785719 0.026816718
## 96 2 8 SPQ_full 14.990115 5.721196 0.040526967
## 97 2 9 AQ_full 3.204341 2.265085 0.015075461
## 98 2 9 EQ_full 10.669103 4.933105 0.032832693
## 99 2 9 SQ_full 5.197209 3.765532 0.025061814
## 100 2 9 SPQ_full 14.900775 5.689103 0.037864300
## 101 2 10 AQ_full 3.063272 2.236186 0.009521378
## 102 2 10 EQ_full 11.009065 4.921571 0.020955381
## 103 2 10 SQ_full 5.071557 3.745460 0.015947662
## 104 2 10 SPQ_full 14.766239 5.728368 0.024390615
## 105 2 11 AQ_full 3.137046 2.237841 0.012649832
## 106 2 11 EQ_full 10.919479 4.888902 0.027635473
## 107 2 11 SQ_full 5.221817 3.768285 0.021300968
## 108 2 11 SPQ_full 14.826400 5.714130 0.032300236
## 109 2 12 AQ_full 3.234543 2.234428 0.007760020
## 110 2 12 EQ_full 10.696575 4.825640 0.016759131
## 111 2 12 SQ_full 6.179809 4.126811 0.014332142
## 112 2 12 SPQ_full 14.852828 5.839262 0.020279371
## 113 2 13 AQ_full 3.310419 2.225342 0.029774621
## 114 2 13 EQ_full 10.463480 4.856860 0.064983799
## 115 2 13 SQ_full 5.538489 3.820133 0.051112604
## 116 2 13 SPQ_full 15.204977 5.802020 0.077629854
## 117 2 NA AQ_full 2.500000 2.121320 1.500000000
## 118 2 NA EQ_full 8.000000 4.242641 3.000000000
## 119 2 NA SQ_full 4.500000 2.121320 1.500000000
## 120 2 NA SPQ_full 12.500000 4.949747 3.500000000
#STEM differences
t.test(controls$AQ_full ~ controls$STEM)
##
## Welch Two Sample t-test
##
## data: controls$AQ_full by controls$STEM
## t = -54.648, df = 89225, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.5285422 -0.4919420
## sample estimates:
## mean in group 0 mean in group 1
## 3.263799 3.774041
ggplot(controls, aes(x=AQ_full, colour=as.character(STEM))) + geom_density() + theme_minimal()
t.test(controls$EQ_full ~ controls$STEM)
##
## Welch Two Sample t-test
##
## data: controls$EQ_full by controls$STEM
## t = 83.145, df = 91180, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.578917 1.655154
## sample estimates:
## mean in group 0 mean in group 1
## 10.250123 8.633087
ggplot(controls, aes(x=EQ_full, colour=as.character(STEM))) + geom_density() + theme_minimal()
t.test(controls$SQ_full ~ controls$STEM)
##
## Welch Two Sample t-test
##
## data: controls$SQ_full by controls$STEM
## t = -102.73, df = 88314, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.776259 -1.709749
## sample estimates:
## mean in group 0 mean in group 1
## 5.740506 7.483511
ggplot(controls, aes(x=SQ_full, colour=as.character(STEM))) + geom_density() + theme_minimal()
t.test(controls$SPQ_full ~ controls$STEM)
##
## Welch Two Sample t-test
##
## data: controls$SPQ_full by controls$STEM
## t = 8.2666, df = 92725, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1386827 0.2248830
## sample estimates:
## mean in group 0 mean in group 1
## 14.52819 14.34641
ggplot(controls, aes(x=SPQ_full, colour=as.character(STEM))) + geom_density() + theme_minimal()
controls2 = controls[,c ("sex", "STEM", "AQ_full", "EQ_full", "SQ_full", "SPQ_full" )]
controls2 = controls2[!is.na(controls2$AQ_full),]
controls2_melt = melt(controls2, id.vars=c("sex", "STEM"))
head(controls2_melt)
## sex STEM variable value
## 1 1 0 AQ_full 6
## 2 1 0 AQ_full 3
## 3 2 0 AQ_full 2
## 4 1 0 AQ_full 5
## 5 2 0 AQ_full 1
## 6 2 0 AQ_full 0
alpha = ddply(controls2_melt, c("sex", "STEM", "variable"), summarise,
mean = mean(value), sd = sd(value),
sem = sd(value)/sqrt(length(value)))
alpha
## sex STEM variable mean sd sem
## 1 1 0 AQ_full 3.489074 2.247115 0.005162604
## 2 1 0 EQ_full 9.069335 4.729977 0.010866822
## 3 1 0 SQ_full 6.467117 4.096856 0.009412267
## 4 1 0 SPQ_full 13.952211 5.522411 0.012687390
## 5 1 1 AQ_full 3.867619 2.367948 0.010395341
## 6 1 1 EQ_full 8.167322 4.784522 0.021004151
## 7 1 1 SQ_full 7.711205 4.333073 0.019022278
## 8 1 1 SPQ_full 14.149746 5.489321 0.024098230
## 9 1 NA AQ_full 2.777778 1.986063 0.662020849
## 10 1 NA EQ_full 9.555556 4.746344 1.582114541
## 11 1 NA SQ_full 3.777778 2.488864 0.829621362
## 12 1 NA SPQ_full 10.555556 4.639804 1.546601212
## 13 2 0 AQ_full 3.149478 2.216301 0.003627252
## 14 2 0 EQ_full 10.849338 4.831171 0.007906810
## 15 2 0 SQ_full 5.371773 3.839555 0.006283908
## 16 2 0 SPQ_full 14.820487 5.752781 0.009415139
## 17 2 1 AQ_full 3.534249 2.383149 0.016747479
## 18 2 1 EQ_full 9.826609 5.072695 0.035648145
## 19 2 1 SQ_full 6.900044 4.269444 0.030003338
## 20 2 1 SPQ_full 14.850363 5.649177 0.039699353
## 21 2 NA AQ_full 2.923077 1.497862 0.415432096
## 22 2 NA EQ_full 10.769231 3.940259 1.092831222
## 23 2 NA SQ_full 4.307692 2.393903 0.663949096
## 24 2 NA SPQ_full 13.000000 6.218253 1.724632997
AQ analysis
summary(lm(AQ_full ~ STEM + as.character(countryregion) + as.character(handedness) + as.character(sex) + education + age, data = controls))
##
## Call:
## lm(formula = AQ_full ~ STEM + as.character(countryregion) + as.character(handedness) +
## as.character(sex) + education + age, data = controls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0911 -1.7073 -0.3292 1.4041 7.4519
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.1579717 0.1045071 49.355 < 2e-16 ***
## STEM 0.4564204 0.0091719 49.763 < 2e-16 ***
## as.character(countryregion)1 0.0698384 0.0200280 3.487 0.000488 ***
## as.character(countryregion)10 -0.0667375 0.0158391 -4.213 2.52e-05 ***
## as.character(countryregion)11 -0.0265650 0.0171152 -1.552 0.120633
## as.character(countryregion)12 -0.0267816 0.0150413 -1.781 0.074987 .
## as.character(countryregion)13 0.0430112 0.0278289 1.546 0.122212
## as.character(countryregion)2 -0.0929391 0.0172554 -5.386 7.20e-08 ***
## as.character(countryregion)3 -0.0644923 0.0216668 -2.977 0.002915 **
## as.character(countryregion)4 -0.1702927 0.0168302 -10.118 < 2e-16 ***
## as.character(countryregion)5 0.0249753 0.0192328 1.299 0.194089
## as.character(countryregion)6 0.0292916 0.0170738 1.716 0.086238 .
## as.character(countryregion)7 0.0355810 0.0181426 1.961 0.049858 *
## as.character(countryregion)8 0.0501540 0.0187756 2.671 0.007557 **
## as.character(countryregion)9 0.0550016 0.0183204 3.002 0.002680 **
## as.character(handedness)1 -1.0221598 0.1036819 -9.859 < 2e-16 ***
## as.character(handedness)2 -0.9591073 0.1039606 -9.226 < 2e-16 ***
## as.character(handedness)3 -0.3636422 0.1046563 -3.475 0.000512 ***
## as.character(sex)2 -0.2917310 0.0060607 -48.135 < 2e-16 ***
## education -0.2218523 0.0031463 -70.512 < 2e-16 ***
## age -0.0041805 0.0002339 -17.876 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.229 on 634902 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.02316, Adjusted R-squared: 0.02313
## F-statistic: 752.6 on 20 and 634902 DF, p-value: < 2.2e-16
summary(lm(EQ_full ~ STEM + as.character(countryregion) + as.character(handedness) + as.character(sex) + education + age, data = data2))
##
## Call:
## lm(formula = EQ_full ~ STEM + as.character(countryregion) + as.character(handedness) +
## as.character(sex) + education + age, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.2265 -3.5727 0.1148 3.6218 13.7252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.7854211 0.2103470 27.504 < 2e-16 ***
## STEM -1.1061495 0.0192079 -57.588 < 2e-16 ***
## as.character(countryregion)1 -0.1636512 0.0415229 -3.941 8.11e-05 ***
## as.character(countryregion)10 0.1065760 0.0326606 3.263 0.001102 **
## as.character(countryregion)11 0.0912821 0.0353718 2.581 0.009862 **
## as.character(countryregion)12 -0.0039324 0.0308816 -0.127 0.898672
## as.character(countryregion)13 -0.1479070 0.0572900 -2.582 0.009831 **
## as.character(countryregion)2 0.1027536 0.0356874 2.879 0.003986 **
## as.character(countryregion)3 0.1274049 0.0448925 2.838 0.004540 **
## as.character(countryregion)4 0.4138088 0.0347594 11.905 < 2e-16 ***
## as.character(countryregion)5 -0.1251053 0.0398486 -3.140 0.001692 **
## as.character(countryregion)6 -0.1058326 0.0352688 -3.001 0.002693 **
## as.character(countryregion)7 -0.2394007 0.0375717 -6.372 1.87e-10 ***
## as.character(countryregion)8 -0.1368748 0.0388759 -3.521 0.000430 ***
## as.character(countryregion)9 -0.1880976 0.0379629 -4.955 7.24e-07 ***
## as.character(handedness)1 1.6051073 0.2088018 7.687 1.51e-14 ***
## as.character(handedness)2 1.4115858 0.2094017 6.741 1.57e-11 ***
## as.character(handedness)3 0.8017991 0.2108465 3.803 0.000143 ***
## as.character(sex)2 1.6823399 0.0126524 132.966 < 2e-16 ***
## education 0.5519083 0.0065539 84.210 < 2e-16 ***
## age 0.0093372 0.0004933 18.930 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.802 on 671548 degrees of freedom
## (34 observations deleted due to missingness)
## Multiple R-squared: 0.05392, Adjusted R-squared: 0.0539
## F-statistic: 1914 on 20 and 671548 DF, p-value: < 2.2e-16
summary(lm(SQ_full ~ STEM + as.character(countryregion) + as.character(handedness) + as.character(sex) + education + age, data = controls))
##
## Call:
## lm(formula = SQ_full ~ STEM + as.character(countryregion) + as.character(handedness) +
## as.character(sex) + education + age, data = controls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0797 -2.9682 -0.7138 2.2849 15.3283
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.2518765 0.1844047 44.749 <2e-16 ***
## STEM 1.2719874 0.0161840 78.596 <2e-16 ***
## as.character(countryregion)1 -0.5090883 0.0353398 -14.406 <2e-16 ***
## as.character(countryregion)10 -0.8891772 0.0279483 -31.815 <2e-16 ***
## as.character(countryregion)11 -0.7249772 0.0302001 -24.006 <2e-16 ***
## as.character(countryregion)12 -0.0110512 0.0265406 -0.416 0.677
## as.character(countryregion)13 -0.4893560 0.0491047 -9.966 <2e-16 ***
## as.character(countryregion)2 -0.7509075 0.0304476 -24.662 <2e-16 ***
## as.character(countryregion)3 -0.6757224 0.0382315 -17.675 <2e-16 ***
## as.character(countryregion)4 -0.7651689 0.0296972 -25.766 <2e-16 ***
## as.character(countryregion)5 -0.7406650 0.0339366 -21.825 <2e-16 ***
## as.character(countryregion)6 -0.7354979 0.0301270 -24.413 <2e-16 ***
## as.character(countryregion)7 -0.7471001 0.0320129 -23.337 <2e-16 ***
## as.character(countryregion)8 -0.7088563 0.0331298 -21.396 <2e-16 ***
## as.character(countryregion)9 -0.7683960 0.0323266 -23.770 <2e-16 ***
## as.character(handedness)1 -1.9208224 0.1829486 -10.499 <2e-16 ***
## as.character(handedness)2 -1.8664223 0.1834404 -10.175 <2e-16 ***
## as.character(handedness)3 0.1132400 0.1846680 0.613 0.540
## as.character(sex)2 -1.1133900 0.0106942 -104.112 <2e-16 ***
## education 0.0084179 0.0055517 1.516 0.129
## age 0.0204007 0.0004127 49.438 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.933 on 634902 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.05396, Adjusted R-squared: 0.05393
## F-statistic: 1811 on 20 and 634902 DF, p-value: < 2.2e-16
summary(lm(SPQ_full ~ STEM + as.character(countryregion) + as.character(handedness) + as.character(sex) + education + age, data = controls))
##
## Call:
## lm(formula = SPQ_full ~ STEM + as.character(countryregion) +
## as.character(handedness) + as.character(sex) + education +
## age, data = controls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.4199 -3.9949 -0.1624 3.8848 17.4793
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.5666565 0.2619189 59.433 < 2e-16 ***
## STEM 0.2481976 0.0229869 10.797 < 2e-16 ***
## as.character(countryregion)1 0.0958396 0.0501949 1.909 0.05622 .
## as.character(countryregion)10 -0.2595867 0.0396964 -6.539 6.19e-11 ***
## as.character(countryregion)11 -0.1330225 0.0428947 -3.101 0.00193 **
## as.character(countryregion)12 -0.2328654 0.0376969 -6.177 6.52e-10 ***
## as.character(countryregion)13 0.0461472 0.0697457 0.662 0.50820
## as.character(countryregion)2 -0.2797712 0.0432461 -6.469 9.85e-11 ***
## as.character(countryregion)3 0.1082538 0.0543020 1.994 0.04620 *
## as.character(countryregion)4 -0.3919889 0.0421804 -9.293 < 2e-16 ***
## as.character(countryregion)5 0.1045011 0.0482018 2.168 0.03016 *
## as.character(countryregion)6 0.0680403 0.0427908 1.590 0.11182
## as.character(countryregion)7 -0.0960455 0.0454695 -2.112 0.03466 *
## as.character(countryregion)8 0.0372836 0.0470559 0.792 0.42817
## as.character(countryregion)9 -0.0720362 0.0459151 -1.569 0.11667
## as.character(handedness)1 -1.4155590 0.2598507 -5.448 5.11e-08 ***
## as.character(handedness)2 -1.4133260 0.2605493 -5.424 5.82e-08 ***
## as.character(handedness)3 1.4375280 0.2622929 5.481 4.24e-08 ***
## as.character(sex)2 0.7253028 0.0151895 47.750 < 2e-16 ***
## education -0.5520497 0.0078853 -70.010 < 2e-16 ***
## age 0.0450394 0.0005861 76.845 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.586 on 634902 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.03098, Adjusted R-squared: 0.03094
## F-statistic: 1015 on 20 and 634902 DF, p-value: < 2.2e-16
summary(lm(AQ_full ~ as.character(handedness) + as.character(sex) + as.character(countryregion) + education + age + as.character(STEM), data = controls ))
##
## Call:
## lm(formula = AQ_full ~ as.character(handedness) + as.character(sex) +
## as.character(countryregion) + education + age + as.character(STEM),
## data = controls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0911 -1.7073 -0.3292 1.4041 7.4519
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.1579717 0.1045071 49.355 < 2e-16 ***
## as.character(handedness)1 -1.0221598 0.1036819 -9.859 < 2e-16 ***
## as.character(handedness)2 -0.9591073 0.1039606 -9.226 < 2e-16 ***
## as.character(handedness)3 -0.3636422 0.1046563 -3.475 0.000512 ***
## as.character(sex)2 -0.2917310 0.0060607 -48.135 < 2e-16 ***
## as.character(countryregion)1 0.0698384 0.0200280 3.487 0.000488 ***
## as.character(countryregion)10 -0.0667375 0.0158391 -4.213 2.52e-05 ***
## as.character(countryregion)11 -0.0265650 0.0171152 -1.552 0.120633
## as.character(countryregion)12 -0.0267816 0.0150413 -1.781 0.074987 .
## as.character(countryregion)13 0.0430112 0.0278289 1.546 0.122212
## as.character(countryregion)2 -0.0929391 0.0172554 -5.386 7.20e-08 ***
## as.character(countryregion)3 -0.0644923 0.0216668 -2.977 0.002915 **
## as.character(countryregion)4 -0.1702927 0.0168302 -10.118 < 2e-16 ***
## as.character(countryregion)5 0.0249753 0.0192328 1.299 0.194089
## as.character(countryregion)6 0.0292916 0.0170738 1.716 0.086238 .
## as.character(countryregion)7 0.0355810 0.0181426 1.961 0.049858 *
## as.character(countryregion)8 0.0501540 0.0187756 2.671 0.007557 **
## as.character(countryregion)9 0.0550016 0.0183204 3.002 0.002680 **
## education -0.2218523 0.0031463 -70.512 < 2e-16 ***
## age -0.0041805 0.0002339 -17.876 < 2e-16 ***
## as.character(STEM)1 0.4564204 0.0091719 49.763 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.229 on 634902 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.02316, Adjusted R-squared: 0.02313
## F-statistic: 752.6 on 20 and 634902 DF, p-value: < 2.2e-16
summary(lm(AQ_full ~ as.character(handedness) + as.character(sex) + as.character(countryregion) + education + age + as.character(STEM) + wheelwrightD, data = controls ))
##
## Call:
## lm(formula = AQ_full ~ as.character(handedness) + as.character(sex) +
## as.character(countryregion) + education + age + as.character(STEM) +
## wheelwrightD, data = controls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3327 -1.1837 -0.1153 1.0918 8.0603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8220984 0.0793073 48.194 < 2e-16 ***
## as.character(handedness)1 -0.2916013 0.0786645 -3.707 0.00021 ***
## as.character(handedness)2 -0.2797688 0.0788750 -3.547 0.00039 ***
## as.character(handedness)3 -0.2139561 0.0793968 -2.695 0.00704 **
## as.character(sex)2 0.3079321 0.0046807 65.788 < 2e-16 ***
## as.character(countryregion)1 0.1453879 0.0151945 9.568 < 2e-16 ***
## as.character(countryregion)10 0.1473700 0.0120202 12.260 < 2e-16 ***
## as.character(countryregion)11 0.1480570 0.0129868 11.401 < 2e-16 ***
## as.character(countryregion)12 -0.0241577 0.0114109 -2.117 0.03425 *
## as.character(countryregion)13 0.1186093 0.0211124 5.618 1.93e-08 ***
## as.character(countryregion)2 0.0872924 0.0130933 6.667 2.61e-11 ***
## as.character(countryregion)3 0.1017048 0.0164391 6.187 6.14e-10 ***
## as.character(countryregion)4 0.0783283 0.0127733 6.132 8.67e-10 ***
## as.character(countryregion)5 0.1547891 0.0145920 10.608 < 2e-16 ***
## as.character(countryregion)6 0.1648974 0.0129544 12.729 < 2e-16 ***
## as.character(countryregion)7 0.1413032 0.0137646 10.266 < 2e-16 ***
## as.character(countryregion)8 0.1751611 0.0142451 12.296 < 2e-16 ***
## as.character(countryregion)9 0.1785001 0.0138997 12.842 < 2e-16 ***
## education -0.1120334 0.0023923 -46.831 < 2e-16 ***
## age -0.0069696 0.0001775 -39.274 < 2e-16 ***
## as.character(STEM)1 -0.0580504 0.0069987 -8.295 < 2e-16 ***
## wheelwrightD 4.3095952 0.0062979 684.288 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.691 on 634901 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.4378, Adjusted R-squared: 0.4378
## F-statistic: 2.354e+04 on 21 and 634901 DF, p-value: < 2.2e-16
summary(lm(AQ_full ~ as.character(handedness) + as.character(sex) + as.character(countryregion) + education + age + as.character(STEM) + wheelwrightD + SPQ_full, data = controls ))
##
## Call:
## lm(formula = AQ_full ~ as.character(handedness) + as.character(sex) +
## as.character(countryregion) + education + age + as.character(STEM) +
## wheelwrightD + SPQ_full, data = controls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9192 -1.1747 -0.1214 1.0767 7.6657
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.2974485 0.0789539 41.764 < 2e-16 ***
## as.character(handedness)1 -0.2805772 0.0781194 -3.592 0.000329 ***
## as.character(handedness)2 -0.2657465 0.0783284 -3.393 0.000692 ***
## as.character(handedness)3 -0.2788516 0.0788495 -3.537 0.000405 ***
## as.character(sex)2 0.2436201 0.0046980 51.857 < 2e-16 ***
## as.character(countryregion)1 0.1371120 0.0150894 9.087 < 2e-16 ***
## as.character(countryregion)10 0.1445650 0.0119370 12.111 < 2e-16 ***
## as.character(countryregion)11 0.1427103 0.0128969 11.065 < 2e-16 ***
## as.character(countryregion)12 -0.0152637 0.0113322 -1.347 0.178002
## as.character(countryregion)13 0.1122622 0.0209662 5.354 8.59e-08 ***
## as.character(countryregion)2 0.0873123 0.0130026 6.715 1.88e-11 ***
## as.character(countryregion)3 0.0874866 0.0163259 5.359 8.38e-08 ***
## as.character(countryregion)4 0.0785912 0.0126847 6.196 5.80e-10 ***
## as.character(countryregion)5 0.1429082 0.0144914 9.862 < 2e-16 ***
## as.character(countryregion)6 0.1540849 0.0128651 11.977 < 2e-16 ***
## as.character(countryregion)7 0.1386690 0.0136692 10.145 < 2e-16 ***
## as.character(countryregion)8 0.1661825 0.0141467 11.747 < 2e-16 ***
## as.character(countryregion)9 0.1738619 0.0138035 12.596 < 2e-16 ***
## education -0.0971885 0.0023809 -40.820 < 2e-16 ***
## age -0.0085524 0.0001770 -48.310 < 2e-16 ***
## as.character(STEM)1 -0.0367117 0.0069538 -5.279 1.30e-07 ***
## wheelwrightD 4.0500268 0.0068331 592.708 < 2e-16 ***
## SPQ_full 0.0388722 0.0004122 94.310 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.679 on 634900 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.4456, Adjusted R-squared: 0.4455
## F-statistic: 2.319e+04 on 22 and 634900 DF, p-value: < 2.2e-16
summary(lm(AQ_full ~ as.character(handedness) + as.character(sex) + as.character(countryregion) + education + age + as.character(STEM) + SQ_full + EQ_full + SPQ_full, data = controls ))
##
## Call:
## lm(formula = AQ_full ~ as.character(handedness) + as.character(sex) +
## as.character(countryregion) + education + age + as.character(STEM) +
## SQ_full + EQ_full + SPQ_full, data = controls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7792 -1.1605 -0.1011 1.0740 7.4496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.6370193 0.0780697 59.396 < 2e-16 ***
## as.character(handedness)1 -0.3269993 0.0771272 -4.240 2.24e-05 ***
## as.character(handedness)2 -0.3154953 0.0773337 -4.080 4.51e-05 ***
## as.character(handedness)3 -0.2658667 0.0778472 -3.415 0.000637 ***
## as.character(sex)2 0.2208153 0.0046416 47.573 < 2e-16 ***
## as.character(countryregion)1 0.0963052 0.0149010 6.463 1.03e-10 ***
## as.character(countryregion)10 0.0948195 0.0117916 8.041 8.91e-16 ***
## as.character(countryregion)11 0.1008164 0.0127372 7.915 2.47e-15 ***
## as.character(countryregion)12 -0.0119638 0.0111882 -1.069 0.284925
## as.character(countryregion)13 0.0743384 0.0207018 3.591 0.000330 ***
## as.character(countryregion)2 0.0462365 0.0128413 3.601 0.000317 ***
## as.character(countryregion)3 0.0450656 0.0161217 2.795 0.005185 **
## as.character(countryregion)4 0.0498160 0.0125255 3.977 6.97e-05 ***
## as.character(countryregion)5 0.0875827 0.0143137 6.119 9.43e-10 ***
## as.character(countryregion)6 0.1009163 0.0127083 7.941 2.01e-15 ***
## as.character(countryregion)7 0.0819372 0.0135027 6.068 1.29e-09 ***
## as.character(countryregion)8 0.1144383 0.0139726 8.190 2.61e-16 ***
## as.character(countryregion)9 0.1176131 0.0136351 8.626 < 2e-16 ***
## education -0.0678663 0.0023617 -28.736 < 2e-16 ***
## age -0.0077112 0.0001749 -44.089 < 2e-16 ***
## as.character(STEM)1 0.0004923 0.0068716 0.072 0.942885
## SQ_full 0.1371815 0.0006108 224.598 < 2e-16 ***
## EQ_full -0.2398276 0.0004455 -538.367 < 2e-16 ***
## SPQ_full 0.0559626 0.0004282 130.696 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.658 on 634899 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.4596, Adjusted R-squared: 0.4595
## F-statistic: 2.347e+04 on 23 and 634899 DF, p-value: < 2.2e-16
cases_STEM = nrow(subset(cases, STEM == "1"))
cases_nonSTEM = nrow(subset(cases, STEM == "0"))
controls_STEM = nrow(subset(controls, STEM == "1"))
controls_nonSTEM = nrow(subset(controls, STEM == "0"))
STEMdiff <- matrix(c(cases_STEM,controls_STEM,cases_nonSTEM,controls_nonSTEM), ncol=2)
colnames(STEMdiff) <- c('STEM', 'nonSTEM')
row.names(STEMdiff) <- c('Cases', 'Controls')
STEMdiff
## STEM nonSTEM
## Cases 4130 32516
## Controls 72137 562796
chisq.test(STEMdiff)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: STEMdiff
## X-squared = 0.27831, df = 1, p-value = 0.5978
summary(glm(STEM ~ as.character(autism) + as.character(sex) + education + age, data = data2))
##
## Call:
## glm(formula = STEM ~ as.character(autism) + as.character(sex) +
## education + age, data = data2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.31325 -0.18744 -0.06259 -0.02206 1.05985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.989e-02 1.403e-03 71.193 <2e-16 ***
## as.character(autism)1 -3.750e-03 1.654e-03 -2.267 0.0234 *
## as.character(sex)2 -1.675e-01 7.762e-04 -215.845 <2e-16 ***
## education 3.764e-02 4.127e-04 91.212 <2e-16 ***
## age 7.219e-04 3.127e-05 23.085 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.09317407)
##
## Null deviance: 67606 on 671576 degrees of freedom
## Residual deviance: 62573 on 671572 degrees of freedom
## (26 observations deleted due to missingness)
## AIC: 312016
##
## Number of Fisher Scoring iterations: 2
males = subset(data2, sex == "1")
females = subset(data2, sex == "2")
t.test(males$AQ_full ~ males$autism)
##
## Welch Two Sample t-test
##
## data: males$AQ_full by males$autism
## t = -64.35, df = 20245, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.344959 -1.265447
## sample estimates:
## mean in group 0 mean in group 1
## 3.570429 4.875632
ggplot(males, aes(x=AQ_full, colour=as.character(autism))) + geom_density() + theme_minimal()
t.test(males$EQ_full ~ males$autism)
##
## Welch Two Sample t-test
##
## data: males$EQ_full by males$autism
## t = 53.934, df = 21079, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.883890 2.025981
## sample estimates:
## mean in group 0 mean in group 1
## 8.875432 6.920497
ggplot(males, aes(x=EQ_full, colour=as.character(autism))) + geom_density() + theme_minimal()
t.test(males$SQ_full ~ males$autism)
##
## Welch Two Sample t-test
##
## data: males$SQ_full by males$autism
## t = -37.886, df = 20472, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.412876 -1.273874
## sample estimates:
## mean in group 0 mean in group 1
## 6.734478 8.077854
ggplot(males, aes(x=SQ_full, colour=as.character(autism))) + geom_density() + theme_minimal()
t.test(males$SPQ_full ~ males$autism)
##
## Welch Two Sample t-test
##
## data: males$SPQ_full by males$autism
## t = -48.823, df = 20363, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.429779 -2.242213
## sample estimates:
## mean in group 0 mean in group 1
## 13.99455 16.33055
ggplot(males, aes(x=SPQ_full, colour=as.character(autism))) + geom_density() + theme_minimal()
t.test(females$AQ_full ~ females$autism)
##
## Welch Two Sample t-test
##
## data: females$AQ_full by females$autism
## t = -72.913, df = 19616, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.535003 -1.454634
## sample estimates:
## mean in group 0 mean in group 1
## 3.169266 4.664085
ggplot(females, aes(x=AQ_full, colour=as.character(autism))) + geom_density() + theme_minimal()
t.test(females$EQ_full ~ females$autism)
##
## Welch Two Sample t-test
##
## data: females$EQ_full by females$autism
## t = 66.896, df = 20100, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.457166 2.605505
## sample estimates:
## mean in group 0 mean in group 1
## 10.796720 8.265385
ggplot(females, aes(x=EQ_full, colour=as.character(autism))) + geom_density() + theme_minimal()
t.test(females$SQ_full ~ females$autism)
##
## Welch Two Sample t-test
##
## data: females$SQ_full by females$autism
## t = -50.195, df = 19845, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.708597 -1.580172
## sample estimates:
## mean in group 0 mean in group 1
## 5.450361 7.094745
ggplot(females, aes(x=SQ_full, colour=as.character(autism))) + geom_density() + theme_minimal()
t.test(females$SPQ_full ~ females$autism)
##
## Welch Two Sample t-test
##
## data: females$SPQ_full by females$autism
## t = -49.417, df = 19995, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.376616 -2.195274
## sample estimates:
## mean in group 0 mean in group 1
## 14.82196 17.10791
ggplot(females, aes(x=SPQ_full, colour=as.character(autism))) + geom_density() + theme_minimal()
data2melt = data2[,c ("sex", "autism", "AQ_full", "EQ_full", "SQ_full", "SPQ_full" )]
data2melt = data2melt[!is.na(data2melt$AQ_full),]
data2melt = melt(data2melt, id.vars=c("sex", "autism"))
head(data2melt)
## sex autism variable value
## 1 2 1 AQ_full 6
## 2 1 1 AQ_full 6
## 3 1 0 AQ_full 6
## 4 1 0 AQ_full 3
## 5 2 1 AQ_full 1
## 6 2 0 AQ_full 2
ddply(data2melt, c("sex", "autism", "variable"), summarise,
mean = mean(value), sd = sd(value),
sem = sd(value)/sqrt(length(value)))
## sex autism variable mean sd sem
## 1 1 0 AQ_full 3.570429 2.278934 0.004638778
## 2 1 0 EQ_full 8.875432 4.756196 0.009681254
## 3 1 0 SQ_full 6.734478 4.180116 0.008508640
## 4 1 0 SPQ_full 13.994552 5.515901 0.011227636
## 5 1 1 AQ_full 4.875632 2.662900 0.019745239
## 6 1 1 EQ_full 6.920497 4.710717 0.034929673
## 7 1 1 SQ_full 8.077854 4.642268 0.034422132
## 8 1 1 SPQ_full 16.330548 6.272512 0.046510289
## 9 2 0 AQ_full 3.169266 2.226789 0.003549372
## 10 2 0 EQ_full 10.796720 4.849119 0.007729213
## 11 2 0 SQ_full 5.450361 3.877523 0.006180545
## 12 2 0 SPQ_full 14.821964 5.747510 0.009161196
## 13 2 1 AQ_full 4.664085 2.743430 0.020191941
## 14 2 1 EQ_full 8.265385 5.032808 0.037042007
## 15 2 1 SQ_full 7.094745 4.371088 0.032171680
## 16 2 1 SPQ_full 17.107909 6.160577 0.045342508
summary(glm(as.numeric(as.character(autism)) ~ as.character(sex) + education +
AQ_full + AQ_full:education + AQ_full:as.character(sex) + as.character(sex):education +
as.character(countryregion) + as.character(handedness) +
age + age:education + AQ_full:age + age:as.character(sex) + STEM, data = data2))
##
## Call:
## glm(formula = as.numeric(as.character(autism)) ~ as.character(sex) +
## education + AQ_full + AQ_full:education + AQ_full:as.character(sex) +
## as.character(sex):education + as.character(countryregion) +
## as.character(handedness) + age + age:education + AQ_full:age +
## age:as.character(sex) + STEM, data = data2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.36922 -0.07468 -0.04092 -0.01579 1.06536
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.692e-01 1.010e-02 16.759 < 2e-16 ***
## as.character(sex)2 -1.651e-02 2.259e-03 -7.307 2.74e-13 ***
## education -2.421e-02 1.000e-03 -24.201 < 2e-16 ***
## AQ_full 2.946e-02 4.376e-04 67.313 < 2e-16 ***
## as.character(countryregion)1 -3.285e-02 1.925e-03 -17.061 < 2e-16 ***
## as.character(countryregion)10 -3.844e-02 1.515e-03 -25.371 < 2e-16 ***
## as.character(countryregion)11 -3.745e-02 1.640e-03 -22.830 < 2e-16 ***
## as.character(countryregion)12 -2.134e-02 1.431e-03 -14.908 < 2e-16 ***
## as.character(countryregion)13 -7.821e-03 2.655e-03 -2.946 0.003221 **
## as.character(countryregion)2 -3.865e-02 1.655e-03 -23.356 < 2e-16 ***
## as.character(countryregion)3 -3.212e-02 2.081e-03 -15.438 < 2e-16 ***
## as.character(countryregion)4 -3.342e-02 1.613e-03 -20.720 < 2e-16 ***
## as.character(countryregion)5 -3.635e-02 1.848e-03 -19.671 < 2e-16 ***
## as.character(countryregion)6 -3.504e-02 1.636e-03 -21.418 < 2e-16 ***
## as.character(countryregion)7 -3.929e-02 1.742e-03 -22.550 < 2e-16 ***
## as.character(countryregion)8 -3.304e-02 1.803e-03 -18.327 < 2e-16 ***
## as.character(countryregion)9 -3.930e-02 1.761e-03 -22.321 < 2e-16 ***
## as.character(handedness)1 -3.590e-02 9.677e-03 -3.710 0.000207 ***
## as.character(handedness)2 -2.917e-02 9.704e-03 -3.006 0.002651 **
## as.character(handedness)3 -1.045e-02 9.771e-03 -1.069 0.285010
## age -2.834e-03 8.073e-05 -35.102 < 2e-16 ***
## STEM -5.718e-03 8.948e-04 -6.391 1.65e-10 ***
## education:AQ_full -2.949e-03 1.301e-04 -22.668 < 2e-16 ***
## as.character(sex)2:AQ_full -4.855e-03 2.459e-04 -19.745 < 2e-16 ***
## as.character(sex)2:education 6.418e-04 6.429e-04 0.998 0.318145
## education:age 6.184e-04 2.301e-05 26.870 < 2e-16 ***
## AQ_full:age -2.116e-04 1.002e-05 -21.130 < 2e-16 ***
## as.character(sex)2:age 6.782e-04 4.929e-05 13.759 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0494977)
##
## Null deviance: 34646 on 671568 degrees of freedom
## Residual deviance: 33240 on 671541 degrees of freedom
## (34 observations deleted due to missingness)
## AIC: -112761
##
## Number of Fisher Scoring iterations: 2
#Sex distribution
controls = subset(data2, autism == "0")
fem_controls = subset(controls, sex == "2")
cat("total number of females is", nrow(fem_controls))
## total number of females is 393600
extremeE = subset(fem_controls, braintype == "1")
E = subset(fem_controls, braintype == "2")
B = subset(fem_controls, braintype == "3")
S = subset(fem_controls, braintype == "4")
extremeS = subset(fem_controls, braintype == "5")
cat("percentage of females with extreme E is", (nrow(extremeE)/nrow(fem_controls))*100)
## percentage of females with extreme E is 2.893293
cat("percentage of females with E is", (nrow(E)/nrow(fem_controls))*100 )
## percentage of females with E is 40.00864
cat("percentage of females with B is", (nrow(B)/nrow(fem_controls))*100)
## percentage of females with B is 29.81326
cat("percentage of females with S is", (nrow(S)/nrow(fem_controls))*100)
## percentage of females with S is 25.58994
cat("percentage of females with Extreme S is", (nrow(extremeS)/nrow(fem_controls))*100)
## percentage of females with Extreme S is 1.694868
#Sex distribution
controls = subset(data2, autism == "0")
male_controls = subset(controls, sex == "1")
cat("total number of males is", nrow(male_controls))
## total number of males is 241355
extremeE = subset(male_controls, braintype == "1")
E = subset(male_controls, braintype == "2")
B = subset(male_controls, braintype == "3")
S = subset(male_controls, braintype == "4")
extremeS = subset(male_controls, braintype == "5")
cat("percentage of males with extreme E is", (nrow(extremeE)/nrow(male_controls))*100)
## percentage of males with extreme E is 0.7532473
cat("percentage of males with E is", (nrow(E)/nrow(male_controls))*100 )
## percentage of males with E is 23.87562
cat("percentage of males with B is", (nrow(B)/nrow(male_controls))*100)
## percentage of males with B is 30.98962
cat("percentage of males with S is", (nrow(S)/nrow(male_controls))*100)
## percentage of males with S is 40.23617
cat("percentage of males with Extreme S is", (nrow(extremeS)/nrow(male_controls))*100)
## percentage of males with Extreme S is 4.145346
#Sex distribution
cases = subset(data2, autism == "1")
fem_cases = subset(cases, sex == "2")
cat("total number of females is", nrow(fem_cases))
## total number of females is 18460
extremeE = subset(fem_cases, braintype == "1")
E = subset(fem_cases, braintype == "2")
B = subset(fem_cases, braintype == "3")
S = subset(fem_cases, braintype == "4")
extremeS = subset(fem_cases, braintype == "5")
cat("percentage of females with extreme E is", (nrow(extremeE)/nrow(fem_cases))*100)
## percentage of females with extreme E is 0.9317443
cat("percentage of females with E is", (nrow(E)/nrow(fem_cases))*100 )
## percentage of females with E is 22.19935
cat("percentage of females with B is", (nrow(B)/nrow(fem_cases))*100)
## percentage of females with B is 27.026
cat("percentage of females with S is", (nrow(S)/nrow(fem_cases))*100)
## percentage of females with S is 42.29144
cat("percentage of females with Extreme S is", (nrow(extremeS)/nrow(fem_cases))*100)
## percentage of females with Extreme S is 7.551463
#Sex distribution
cases = subset(data2, autism == "1")
male_cases = subset(cases, sex == "1")
cat("total number of males is", nrow(male_cases))
## total number of males is 18188
extremeE = subset(male_cases, braintype == "1")
E = subset(male_cases, braintype == "2")
B = subset(male_cases, braintype == "3")
S = subset(male_cases, braintype == "4")
extremeS = subset(male_cases, braintype == "5")
cat("percentage of females with extreme E is", (nrow(extremeE)/nrow(male_cases))*100)
## percentage of females with extreme E is 0.3023972
cat("percentage of females with E is", (nrow(E)/nrow(male_cases))*100 )
## percentage of females with E is 13.37145
cat("percentage of females with B is", (nrow(B)/nrow(male_cases))*100)
## percentage of females with B is 23.92237
cat("percentage of females with S is", (nrow(S)/nrow(male_cases))*100)
## percentage of females with S is 50.97867
cat("percentage of females with Extreme S is", (nrow(extremeS)/nrow(male_cases))*100)
## percentage of females with Extreme S is 11.42512
m = aov(data=controls, AQ_full~as.character(braintype))
anova(m)
## Analysis of Variance Table
##
## Response: AQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(braintype) 4 1223971 305993 96889 < 2.2e-16 ***
## Residuals 634950 2005284 3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD((m), "as.character(braintype)")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = AQ_full ~ as.character(braintype), data = controls)
##
## $`as.character(braintype)`
## diff lwr upr p adj
## 2-1 0.9440355 0.9005766 0.9874945 0
## 3-1 2.1152719 2.0716630 2.1588808 0
## 4-1 3.8655434 3.8219748 3.9091119 0
## 5-1 6.3070507 6.2505831 6.3635184 0
## 3-2 1.1712363 1.1560195 1.1864531 0
## 4-2 2.9215078 2.9064071 2.9366085 0
## 5-2 5.3630152 5.3240484 5.4019820 0
## 4-3 1.7502715 1.7347446 1.7657984 0
## 5-3 4.1917788 4.1526449 4.2309128 0
## 5-4 2.4415074 2.4024184 2.4805963 0
m = aov(data=controls, SPQ_full~as.character(braintype))
anova(m)
## Analysis of Variance Table
##
## Response: SPQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(braintype) 4 2584807 646202 22970 < 2.2e-16 ***
## Residuals 634950 17862955 28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD((m), "as.character(braintype)")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SPQ_full ~ as.character(braintype), data = controls)
##
## $`as.character(braintype)`
## diff lwr upr p adj
## 2-1 2.169472 2.039764 2.299181 0
## 3-1 3.923305 3.793149 4.053461 0
## 4-1 6.269979 6.139944 6.400014 0
## 5-1 10.221173 10.052639 10.389707 0
## 3-2 1.753832 1.708416 1.799249 0
## 4-2 4.100507 4.055437 4.145577 0
## 5-2 8.051701 7.935400 8.168002 0
## 4-3 2.346674 2.300332 2.393016 0
## 5-3 6.297868 6.181068 6.414668 0
## 5-4 3.951194 3.834528 4.067860 0
controls2 = controls[,c ("braintype", "AQ_full", "SPQ_full" )]
controls2 = controls2[!is.na(controls2$AQ_full),]
controls2_melt = melt(controls2, id.vars=c("braintype"))
ddply(controls2_melt, c("braintype", "variable"), summarise,
mean = mean(value), sd = sd(value),
sem = sd(value)/sqrt(length(value)))
## braintype variable mean sd sem
## 1 1 AQ_full 0.9918219 1.002425 0.008723006
## 2 1 SPQ_full 10.3633197 5.345941 0.046519871
## 3 2 AQ_full 1.9358574 1.432427 0.003088540
## 4 2 SPQ_full 12.5327919 5.241795 0.011302144
## 5 3 AQ_full 3.1070938 1.744665 0.003980184
## 6 3 SPQ_full 14.2866243 5.244543 0.011964615
## 7 4 AQ_full 4.8573653 2.135523 0.004801244
## 8 4 SPQ_full 16.6332986 5.410782 0.012164928
## 9 5 AQ_full 7.2988726 1.953192 0.015125123
## 10 5 SPQ_full 20.5844927 5.469007 0.042350893
sexdiff = matrix(c(18230, 241604, 18515, 393930), ncol = 2, byrow = TRUE)
colnames(sexdiff) = c("Cases", "Controls")
rownames(sexdiff) = c("Males", "Females")
sexdiff
## Cases Controls
## Males 18230 241604
## Females 18515 393930
chisq.test(sexdiff)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: sexdiff
## X-squared = 1969.5, df = 1, p-value < 2.2e-16
data3 = subset(data, repeat. == 0) #695166
data3$autism = ifelse(data3$diagnosis_0 == "2" | data3$diagnosis_1 == "2"| data3$diagnosis_3 == "2" | data3$diagnosis_4 == "2" | data3$diagnosis_5 == "2" | data3$diagnosis_6 == "2" | data3$diagnosis_7 == "2" | data3$diagnosis_8 == "2" | data3$autism_diagnosis_0 > 0 | data3$autism_diagnosis_1 > 0 | data3$autism_diagnosis_2 > 0, 1, 0)
data3$autism[is.na(data3$autism)] <- 0
cases_2 = subset(data3, autism == "1")
controls_2 = subset(data3, autism == "0")
controls1 = subset(controls_2, sex == "1")
controls2 = subset(controls_2, sex == "2")
controls3 = subset(controls_2, sex == "3")
a = nrow(controls1)
b = nrow(controls2)
c = nrow(controls3)
cases1 = subset(cases_2, sex == "1")
cases2 = subset(cases_2, sex == "2")
cases3 = subset(cases_2, sex == "3")
d = nrow(cases1)
e = nrow(cases2)
f = nrow(cases3)
sex_3_way = matrix(c(a, d, b, e, c, f ), ncol = 2, byrow = TRUE)
colnames(sex_3_way) = c("Controls", "Cases")
rownames(sex_3_way) = c("Males", "Females", "Other")
sex_3_way
## Controls Cases
## Males 241604 18230
## Females 393930 18515
## Other 2875 978
chisq.test(sex_3_way)
##
## Pearson's Chi-squared test
##
## data: sex_3_way
## X-squared = 4817.1, df = 2, p-value < 2.2e-16
sex_2_way = matrix(c(d+e, a+b, f, c ), ncol = 2, byrow = TRUE)
colnames(sex_2_way) = c("Cases", "Controls")
rownames(sex_2_way) = c("Binary", "Non-binary")
sex_2_way
## Cases Controls
## Binary 36745 635534
## Non-binary 978 2875
chisq.test(sex_2_way)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: sex_2_way
## X-squared = 2881.1, df = 1, p-value < 2.2e-16
#ANOVA with handedness
###AQ###
cat("handedness AQ results")
## handedness AQ results
m = aov(data=controls, AQ_full~as.character(handedness))
anova(m)
## Analysis of Variance Table
##
## Response: AQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(handedness) 3 11032 3677.3 725.52 < 2.2e-16 ***
## Residuals 634942 3218199 5.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD((m), "as.character(handedness)")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = AQ_full ~ as.character(handedness), data = controls)
##
## $`as.character(handedness)`
## diff lwr upr p adj
## 1-0 -1.15234475 -1.42125375 -0.8834358 0.00e+00
## 2-0 -1.06600812 -1.33564396 -0.7963723 0.00e+00
## 3-0 -0.47424075 -0.74570118 -0.2027803 4.25e-05
## 2-1 0.08633663 0.06362319 0.1090501 0.00e+00
## 3-1 0.67810400 0.63933311 0.7168749 0.00e+00
## 3-2 0.59176738 0.54824016 0.6352946 0.00e+00
tky = as.data.frame(TukeyHSD(m)$"as.character(handedness)")
tky$pair = rownames(tky)
tky$`p adj` = ifelse(tky$`p adj` == 0, 2.2e-16, tky$`p adj`)
# Plot pairwise TukeyHSD comparisons and color by significance level
ggplot(tky, aes(colour=cut(`p adj`, c(0, 0.01, 0.05, 1),
label=c("p<0.01","p<0.05","Non-Sig")))) +
geom_hline(yintercept=0, lty="11", colour="grey30") +
geom_errorbar(aes(pair, ymin=lwr, ymax=upr), width=0.2) +
geom_point(aes(pair, diff)) +
labs(colour="") +labs(x = "Difference", y = "Comparisons") + theme(legend.position="none") + theme_minimal()
###EQ###
cat("handedness EQ results")
## handedness EQ results
m = aov(data=controls, EQ_full~as.character(handedness))
anova(m)
## Analysis of Variance Table
##
## Response: EQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(handedness) 3 17894 5964.6 248.36 < 2.2e-16 ***
## Residuals 634942 15249059 24.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD((m), "as.character(handedness)")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = EQ_full ~ as.character(handedness), data = controls)
##
## $`as.character(handedness)`
## diff lwr upr p adj
## 1-0 1.9037164 1.3183599 2.4890729 0e+00
## 2-0 1.6122419 1.0253033 2.1991806 0e+00
## 3-0 1.1732242 0.5823138 1.7641346 2e-06
## 2-1 -0.2914745 -0.3409167 -0.2420323 0e+00
## 3-1 -0.7304922 -0.8148880 -0.6460964 0e+00
## 3-2 -0.4390177 -0.5337670 -0.3442684 0e+00
tky = as.data.frame(TukeyHSD(m)$"as.character(handedness)")
tky$pair = rownames(tky)
tky$`p adj` = ifelse(tky$`p adj` == 0, 2.2e-16, tky$`p adj`)
# Plot pairwise TukeyHSD comparisons and color by significance level
ggplot(tky, aes(colour=cut(`p adj`, c(0, 0.01, 0.05, 1),
label=c("p<0.01","p<0.05","Non-Sig")))) +
geom_hline(yintercept=0, lty="11", colour="grey30") +
geom_errorbar(aes(pair, ymin=lwr, ymax=upr), width=0.2) +
geom_point(aes(pair, diff)) +
labs(colour="") +labs(x = "Difference", y = "Comparisons") + theme(legend.position="none") + theme_minimal()
###SQ###
cat("handedness SQ results")
## handedness SQ results
m = aov(data=controls, SQ_full~as.character(handedness))
anova(m)
## Analysis of Variance Table
##
## Response: SQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(handedness) 3 105051 35017 2163.5 < 2.2e-16 ***
## Residuals 634942 10276637 16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD((m), "as.character(handedness)")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SQ_full ~ as.character(handedness), data = controls)
##
## $`as.character(handedness)`
## diff lwr upr p adj
## 1-0 -2.2470011 -2.72753541 -1.7664667 0.0000000
## 2-0 -2.1402850 -2.62211815 -1.6584518 0.0000000
## 3-0 -0.0968786 -0.58197230 0.3882151 0.9560049
## 2-1 0.1067161 0.06612772 0.1473045 0.0000000
## 3-1 2.1501225 2.08083975 2.2194052 0.0000000
## 3-2 2.0434064 1.96562420 2.1211885 0.0000000
tky = as.data.frame(TukeyHSD(m)$"as.character(handedness)")
tky$pair = rownames(tky)
tky$`p adj` = ifelse(tky$`p adj` == 0, 2.2e-16, tky$`p adj`)
# Plot pairwise TukeyHSD comparisons and color by significance level
ggplot(tky, aes(colour=cut(`p adj`, c(0, 0.01, 0.05, 1),
label=c("p<0.01","p<0.05","Non-Sig")))) +
geom_hline(yintercept=0, lty="11", colour="grey30") +
geom_errorbar(aes(pair, ymin=lwr, ymax=upr), width=0.2) +
geom_point(aes(pair, diff)) +
labs(colour="") +labs(x = "Difference", y = "Comparisons") + theme(legend.position="none") + theme_minimal()
###SPQ##
cat("handedness SPQ results")
## handedness SPQ results
m = aov(data=controls, SPQ_full~as.character(handedness))
anova(m)
## Analysis of Variance Table
##
## Response: SPQ_full
## Df Sum Sq Mean Sq F value Pr(>F)
## as.character(handedness) 3 199691 66564 2087.3 < 2.2e-16 ***
## Residuals 634942 20247766 32
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD((m), "as.character(handedness)")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SPQ_full ~ as.character(handedness), data = controls)
##
## $`as.character(handedness)`
## diff lwr upr p adj
## 1-0 -1.6009632 -2.2754718 -0.926454634 0.0000000
## 2-0 -1.6504084 -2.3267401 -0.974076690 0.0000000
## 3-0 1.3721611 0.6912527 2.053069447 0.0000013
## 2-1 -0.0494452 -0.1064176 0.007527249 0.1152571
## 3-1 2.9731243 2.8758746 3.070373908 0.0000000
## 3-2 3.0225695 2.9133895 3.131749456 0.0000000
tky = as.data.frame(TukeyHSD(m)$"as.character(handedness)")
tky$pair = rownames(tky)
tky$`p adj` = ifelse(tky$`p adj` == 0, 2.2e-16, tky$`p adj`)
# Plot pairwise TukeyHSD comparisons and color by significance level
ggplot(tky, aes(colour=cut(`p adj`, c(0, 0.01, 0.05, 1),
label=c("p<0.01","p<0.05","Non-Sig")))) +
geom_hline(yintercept=0, lty="11", colour="grey30") +
geom_errorbar(aes(pair, ymin=lwr, ymax=upr), width=0.2) +
geom_point(aes(pair, diff)) +
labs(colour="") +labs(x = "Difference", y = "Comparisons") + theme(legend.position="none") + theme_minimal()
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
data2$category = ifelse(data2$sex == 1 & data2$autism == 0, "control_males", "NA")
data2$category = ifelse(data2$sex == 2 & data2$autism == 0, "control_females", data2$category)
data2$category = ifelse(data2$sex == 2 & data2$autism == 1, "autism_females", data2$category)
data2$category = ifelse(data2$sex == 1 & data2$autism == 1, "autism_males", data2$category)
#CDFfigure
cdf_plot = ggplot(data2, aes(wheelwrightD, colour = category)) + stat_ecdf() +xlab("Dscore") + ylab("Cumulative frequency") + theme_classic()
#Case-control by sex Figure
a = ggplot(data2, aes(x=AQ_full, colour=as.character(category))) + geom_density(adjust = 4) + theme_classic() + xlab("AQ-10 scores") + scale_x_continuous(breaks=seq(0,11,2))
b = ggplot(data2, aes(x=EQ_full, colour=as.character(category))) + geom_density(adjust = 2) + theme_classic() + xlab("EQ-10 scores") + scale_x_continuous(breaks=seq(0,41,2))
c = ggplot(data2, aes(x=SQ_full, colour=as.character(category))) + geom_density(adjust = 2) + theme_classic() + xlab("SQ-10 scores") + scale_x_continuous(breaks=seq(0,41,2))
d = ggplot(data2, aes(x=SPQ_full, colour=as.character(category))) + geom_density(adjust = 2) + theme_classic() + xlab("SPQ-10 scores") + scale_x_continuous(breaks=seq(0,31,5))
case_control_plot = multiplot(a, b, c, d, cols=2)
#STEM figure
controls2 = controls[!is.na(controls$STEM),]
controls2$category = ifelse(controls2$sex == 1 & controls2$STEM == 0, "nonSTEM_males", "NA")
controls2$category = ifelse(controls2$sex == 2 & controls2$STEM == 0, "nonSTEM_females", controls2$category)
controls2$category = ifelse(controls2$sex == 1 & controls2$STEM == 1, "STEM_males", controls2$category)
controls2$category = ifelse(controls2$sex == 2 & controls2$STEM == 1, "STEM_females", controls2$category)
a = ggplot(controls2, aes(x=AQ_full, colour=as.character(category))) + geom_density(adjust = 4) + theme_classic() + xlab("AQ-10 scores") + scale_x_continuous(breaks=seq(0,11,2))
b = ggplot(controls2, aes(x=EQ_full, colour=as.character(category))) + geom_density(adjust = 2) + theme_classic() + xlab("EQ-10 scores") + scale_x_continuous(breaks=seq(0,41,2))
c = ggplot(controls2, aes(x=SQ_full, colour=as.character(category))) + geom_density(adjust = 2) + theme_classic() + xlab("SQ-10 scores") + scale_x_continuous(breaks=seq(0,41,2))
d = ggplot(controls2, aes(x=SPQ_full, colour=as.character(category))) + geom_density(adjust = 2) + theme_classic() + xlab("SPQ-10 scores") + scale_x_continuous(breaks=seq(0,31,5))
stem_diff_plot = multiplot(a, b, c, d, cols=2)